knitr::opts_knit$set(root.dir = '~/Documents/bgs_sims/')

Simulated data from \(N_e=10,000\) with bottleneck and growth following Beissinger et al. 2016. Munged data will be neutral \(\pi\) and \(\xi_1\) (singletons) shown every 100 generations in each of 24 4Kb windows centered around a 4kb “gene” which has neutral () and deleterious () mutations. Deleterious mutations are drawn from a gamma with some mean s (see below).

Boring Libraries and functions

library(tidyverse)
library(ggjoy)
library(viridis)
library(cowplot)

Europeans

For Europeans, we follow a slightly modified model of Torres et al. 2017 but the gene model is as above. Our demographic model is

read_sim<-function(pidata,xidata,neutral=FALSE){
  #read data
  pi<-as.tibble(read.csv(pidata,header=F))
  pi<-mutate(pi,stat=rep("pi",length(pi[,1])))
  xi<-as.tibble(read.csv(xidata,header=F)) 
  xi<-mutate(xi,stat=rep("xi",length(xi[,1])))
  #munge into single tibble of means
  data<-rbind(xi,pi) %>%
    `colnames<-`(c("gen", 1:25, "sim", "stat")) %>%
    gather(window,value,2:26) %>% 
    mutate(gen=gen-100000) %>%
    group_by(gen,stat,window) %>%
    summarize(mean_val=mean(value)) %>%
    mutate(window=(as.numeric(window)-13)*4000*3E-5,mean_val=mean_val)
    data=mutate(data,mean_val=if_else(window==0.00,mean_val*4,mean_val))
  if(neutral==TRUE){
    data=mutate(data,mean_val=if_else(window==0.00,mean_val/4,mean_val))
  }

  return(data)
}
simplot<-function(somedata,mystat,label,bneck){
  ggplot(filter(somedata,gen %% 100 <1,stat==mystat,gen>bneck), aes(x=window, y=mean_val, group = gen,color=gen)) + 
  scale_color_viridis(name='Generation') +   
  background_grid(major = "xy", minor = "none") +
  geom_line() + 
  ylab(label)  + xlab("cM") +
  scale_x_continuous(breaks = seq(-1.5,1.5,0.5))
}
merge_sel_neutral <- function(sel_sim_data,neut_sim_data){
  merged <- merge(sel_sim_data,neut_sim_data,by=c('gen','stat','window'))
  merged$mean_val <- merged$mean_val.x/merged$mean_val.y
  merged
}
setwd('~/Documents/bgs_sims/')

Neutral Sims

maize_neutral <-read_sim("sims/results/sim_neutralPi_maize_neutral2.csv","sims/results/sim_singleton_maize_neutral2.csv",neutral=TRUE)
head(maize_neutral)
## Source: local data frame [6 x 4]
## Groups: gen, stat [1]
## 
## # A tibble: 6 x 4
##     gen  stat window mean_val
##   <dbl> <chr>  <dbl>    <dbl>
## 1   200    pi  -1.44 47.63346
## 2   200    pi  -0.36 48.47615
## 3   200    pi  -0.24 46.79799
## 4   200    pi  -0.12 48.13605
## 5   200    pi   0.00 47.08144
## 6   200    pi   0.12 47.98168
unique(maize_neutral$window)
##  [1] -1.44 -0.36 -0.24 -0.12  0.00  0.12  0.24  0.36  0.48  0.60  0.72
## [12] -1.32  0.84  0.96  1.08  1.20  1.32  1.44 -1.20 -1.08 -0.96 -0.84
## [23] -0.72 -0.60 -0.48
simplot(maize_neutral,"pi",expression(bar(pi)),2200)

European_neutral <-read_sim("sims/results/sim_neutralPi_European_neutral2.csv","sims/results/sim_singleton_European_neutral2.csv",neutral=TRUE)
simplot(European_neutral,"pi",expression(bar(pi)),2200)

Weak selection \(s=5\times 10^{-5}\)

Maize

maize<-read_sim("sims/results/sim_neutralPi_maize_s5E-5.csv","sims/results/sim_singleton_maize_s5E-5.csv")
maize_merged <- merge_sel_neutral(maize,maize_neutral)

\(\pi\)

simplot(maize_merged,"pi",expression(bar(pi)/bar(pi)[neut]),2200)

\(\xi_1\)

simplot(maize_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),2200)

Human

European<-read_sim("sims/results/sim_neutralPi_European_s5E-5.csv","sims/results/sim_singleton_European_s5E-5.csv")
European_merged <- merge_sel_neutral(European,European_neutral)

\(\pi\)

simplot(European_merged,"pi",expression(bar(pi)/bar(pi)[neut]),100)

\(\xi_1\)

simplot(European_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),100)

Intermediate selection \(s=5\times 10^{-4}\)

Maize

maize<-read_sim("sims/results/sim_neutralPi_maize_s5E-4.csv","sims/results/sim_singleton_maize_s5E-4.csv")
maize_merged <- merge_sel_neutral(maize,maize_neutral)

\(\pi\)

simplot(maize_merged,"pi",expression(bar(pi)/bar(pi)[neut]),2200)

\(\xi_1\)

simplot(maize_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),2200)

Human

European<-read_sim("sims/results/sim_neutralPi_European_s5E-4.csv","sims/results/sim_singleton_European_s5E-4.csv")
European_merged <- merge_sel_neutral(European,European_neutral)

\(\pi\)

simplot(European_merged,"pi",expression(bar(pi)/bar(pi)[neut]),100)

\(\xi_1\)

simplot(European_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),100)

Strong selection \(s=5\times 10^{-3}\)

Maize

maize<-read_sim("sims/results/sim_neutralPi_maize_s5E-3.csv","sims/results/sim_singleton_maize_s5E-3.csv")
maize_merged <- merge_sel_neutral(maize,maize_neutral)

\(\pi\)

simplot(maize_merged,"pi",expression(bar(pi)/bar(pi)[neut]),2200)

\(\xi_1\)

simplot(maize_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),2200)

Human

European<-read_sim("sims/results/sim_neutralPi_European_s5E-3.csv","sims/results/sim_singleton_European_s5E-3.csv")
European_merged <- merge_sel_neutral(European,European_neutral)

\(\pi\)

simplot(European_merged,"pi",expression(bar(pi)/bar(pi)[neut]),100)

\(\xi_1\)

simplot(European_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),100)

Anova to test effect of generation on \(\pi\) and \(\xi\)

read_single_sim<-function(pidata,xidata,neutral=FALSE){
  #read data
  pi<-as.tibble(read.csv(pidata,header=F))
  pi<-mutate(pi,stat=rep("pi",length(pi[,1])))
  xi<-as.tibble(read.csv(xidata,header=F)) 
  xi<-mutate(xi,stat=rep("xi",length(xi[,1])))
  #munge into single tibble of means
  data<-rbind(xi,pi) %>%
    `colnames<-`(c("gen", 1:25, "sim", "stat")) %>%
    gather(window,value,2:26) %>% 
    mutate(gen=gen-100000) %>%
    mutate(window=(as.numeric(window)-13)*4000*3E-5)
    data=mutate(data,value=if_else(window==0.00,value*4,value))
  if(neutral==TRUE){
    data=mutate(data,value=if_else(window==0.00,value/4,value))
  }
  return(data)
}
merge_single_sel_neutral <- function(sel_sim_data,neut_sim_data){
  merged <- merge(sel_sim_data,neut_sim_data,by=c('gen','sim','stat','window'))
  merged$value <- merged$value.x/merged$value.y
  merged
}
sim_anova <- function(somedata,stats){
sep_parm <- filter(somedata,stat==stats,!is.na(value),value != Inf)
sep_parm$gen <- as.factor(sep_parm$gen)
sep_parm$window <- as.factor(sep_parm$window)
fit <- lm(value ~ gen + window + gen*window, data=sep_parm,na.action = na.exclude)

print(anova(fit))
}
maize_neutral <-read_single_sim("sims/results/sim_neutralPi_maize_neutral2.csv","sims/results/sim_singleton_maize_neutral2.csv",neutral=TRUE)
European_neutral <-read_single_sim("sims/results/sim_neutralPi_European_neutral2.csv","sims/results/sim_singleton_European_neutral2.csv",neutral=TRUE)

\(s=5\times 10^{-4}\)

Anova \(\pi\)

Maize

maize<-read_single_sim("sims/results/sim_neutralPi_maize_s5E-4.csv","sims/results/sim_singleton_maize_s5E-4.csv")
maize_merged <- merge_single_sel_neutral(maize,maize_neutral)
sim_anova(maize_merged,'pi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq   F value Pr(>F)    
## gen           32    0.6   0.018    0.2866      1    
## window        24 5592.5 233.021 3722.7011 <2e-16 ***
## gen:window   768    8.1   0.010    0.1675      1    
## Residuals  81675 5112.4   0.063                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Humans

European<-read_single_sim("sims/results/sim_neutralPi_European_s5E-4.csv","sims/results/sim_singleton_European_s5E-4.csv")
European_merged <- merge_single_sel_neutral(European,European_neutral)
sim_anova(European_merged,'pi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq   F value Pr(>F)    
## gen           32    2.7   0.084    0.9819 0.4957    
## window        24 5714.5 238.104 2784.8759 <2e-16 ***
## gen:window   768   19.4   0.025    0.2950 1.0000    
## Residuals  81675 6983.1   0.085                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova \(\xi\)

Maize

sim_anova(maize_merged,'xi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq  F value    Pr(>F)    
## gen           32   1874  58.556 138.8111 < 2.2e-16 ***
## window        24     47   1.955   4.6348 3.514e-13 ***
## gen:window   768    382   0.497   1.1781 0.0004822 ***
## Residuals  81519  34388   0.422                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Humans

sim_anova(European_merged,'xi')
## Analysis of Variance Table
## 
## Response: value
##               Df  Sum Sq Mean Sq F value    Pr(>F)    
## gen           32   166.5  5.2037 20.1613 < 2.2e-16 ***
## window        24    32.4  1.3508  5.2337 9.943e-16 ***
## gen:window   768   182.5  0.2377  0.9209    0.9418    
## Residuals  81675 21080.8  0.2581                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

\(s=5\times 10^{-5}\)

Anova \(\pi\)

Maize

maize<-read_single_sim("sims/results/sim_neutralPi_maize_s5E-5.csv","sims/results/sim_singleton_maize_s5E-5.csv")
maize_merged <- merge_single_sel_neutral(maize,maize_neutral)
sim_anova(maize_merged,'pi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq  F value Pr(>F)    
## gen           32    1.4   0.043   0.5005 0.9917    
## window        24 1918.5  79.938 926.3525 <2e-16 ***
## gen:window   768   11.1   0.014   0.1672 1.0000    
## Residuals  81675 7048.0   0.086                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Humans

European<-read_single_sim("sims/results/sim_neutralPi_European_s5E-5.csv","sims/results/sim_singleton_European_s5E-5.csv")
European_merged <- merge_single_sel_neutral(European,European_neutral)
sim_anova(European_merged,'pi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq  F value  Pr(>F)    
## gen           32    6.6   0.205   1.7479 0.00553 ** 
## window        24 1889.0  78.708 670.7534 < 2e-16 ***
## gen:window   768   19.5   0.025   0.2164 1.00000    
## Residuals  81675 9583.9   0.117                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova \(\xi\)

Maize

sim_anova(maize_merged,'xi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq  F value Pr(>F)    
## gen           32   2195  68.594 146.0606 <2e-16 ***
## window        24     10   0.407   0.8665 0.6507    
## gen:window   768    338   0.440   0.9372 0.8915    
## Residuals  81519  38283   0.470                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Humans

sim_anova(European_merged,'xi')
## Analysis of Variance Table
## 
## Response: value
##               Df  Sum Sq Mean Sq F value    Pr(>F)    
## gen           32   142.5  4.4520 16.4619 < 2.2e-16 ***
## window        24    25.8  1.0749  3.9745 1.846e-10 ***
## gen:window   768   190.9  0.2486  0.9193    0.9454    
## Residuals  81675 22088.4  0.2704                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1